#!/usr/bin/env python # coding: utf-8 # # NumPy: creating and manipulating numerical data # ## Section 1: What is NumPy and numpy arrays # # **Python:** - has built-in # - containers (costless insertion and append), dictionnaries (fast lookup) # - high-level number objects (integers, floating points) # # **NumPy** is: # - extension package to Python to multidimensional arrays # - faster (as you'll see below) # - convenient and tested by scientific community # In[1]: import numpy as np # In[2]: a = np.array([0,1,2,3]) a # In[3]: l = range(1000) get_ipython().run_line_magic('timeit', '[i**2 for i in l]') # In[4]: a = np.arange(1000) get_ipython().run_line_magic('timeit', 'a**2') # In[5]: help(np.array) # In[6]: np.lookfor('create array') # In[7]: help(np.lookfor) # ## Creating Arrays # # ### 1-Dimensional # In[8]: a = np.array([0,1,2,3]) a # In[9]: a.ndim # In[10]: a.shape # In[11]: len(a) # ### 2-D, 3-D and more # In[12]: b = np.array([[0, 1, 2], [3, 4, 5]]) b # In[13]: b.ndim # In[14]: b.shape # In[15]: len(b) # In[16]: c = np.array([[[1], [2]], [[3], [4]]]) c # In[17]: c.shape # In[18]: print("Number of dimensions in array c: ",c.ndim) # ### Evenly spaced # In[19]: #Evenly spaced - notice how it always starts with 0 .. (n-1) and not 1! a = np.arange(10) a # ### or, number of points using linspace # In[20]: # number of points c = np.linspace(0, 1, 9) # start, end, number of points c # In[21]: d = np.linspace(0, 1, 5, endpoint=False) # meaning it doesn't stop at 1. d # ### Common arrays # In[22]: a = np.ones((3, 3)) # (3, 3) would be a tuple here a # In[23]: type(a) # In[24]: b = np.zeros((2 ,2)) b # In[25]: c = np.eye(3) # An identity matrix c # In[26]: d = np.diag(np.array([1,2,3,4])) d # ### Random numbers # In[27]: e = np.random.rand(4) # uniform in [0, 1] e # In[28]: f = np.random.randn(4) #Gaussian f # type help(np.random.randn) to understand more # In[29]: np.random.seed(1234) #setting the random seed help(np.random.seed) # In[ ]: # ## Exercise 1 # # **Create an array that looks like this:** # # $$x = # \begin{bmatrix} # 1 & 1 & 1 & 1 \\ # 1 & 1 & 1 & 1 \\ # 1 & 1 & 1 & 8 \\ # 1 & 6 & 1 & 1 \\ # \end{bmatrix}\tag{1}$$ and, # # **another one** that looks like this: # # $$y = # \begin{bmatrix} # 0. & 0. & 0. & 0. & 0.\\ # 7. & 0. & 0. & 0. & 0.\\ # 0. & 8. & 0. & 0. & 0.\\ # 0. & 0. & 9. & 0. & 0.\\ # 0. & 0. & 0. & 10. & 0.\\ # 0. & 0. & 0. & 0. & 11.\\ # \end{bmatrix}\tag{2}$$ # # and lastly, # # **create** this simple array # # $$\begin{bmatrix} # 0. & 0. & 0. & 0. & 0.\\ # 0. & 0. & 0. & 0. & 0.\\ # 1. & 0. & 0. & 0. & 0.\\ # 0. & 1. & 0. & 0. & 0.\\ # 0. & 0. & 1. & 0. & 0.\\ # 0. & 0. & 0. & 1. & 0.\\ # \end{bmatrix}\tag{3}$$ # In[30]: help(np.eye) help(np.diag) # ### Basic Data Types # In[31]: a = np.array([1,2,3]) a.dtype # In[32]: b = np.array([1., 2., 3.]) b.dtype # ### Note: Datatype and performance # # Remember, different datatypes allow us to store data more compactly but most of the time folks as well as the interpreter auto-detects the datatype from input. # In[33]: # Make it explicit c = np.array([1, 2, 3], dtype=float) c.dtype # In[34]: # Default data type is automatically float a = np.ones((3,3)) a.dtype # ### More datatypes # # **Complex** # # ** Bool ** # # ** Strings ** # # ** Integers ** # In[35]: # Complex d = np.array([1+2j, 4+5j, 6+8*1j]) d.dtype # In[36]: # Bool e = np.array([True, False, False, True]) e.dtype # In[37]: f = np.array(['Bonjour', 'Hi', 'Hola', 'Ole', 'Namaste Ji']) f.dtype # outputs string containing max 10 letters... # ## Some Basic Visualization # In[38]: import matplotlib.pyplot as plt get_ipython().run_line_magic('matplotlib', 'inline') # In[39]: x = np.linspace(0, 3, 20) y = np.linspace(0, 9, 20) plt.plot(x, y) # plots a line # In[40]: plt.plot(x, y, 'o') # ### 2D arrays # In[41]: image = np.random.rand(40, 40) plt.imshow(image, cmap=plt.cm.Blues) plt.colorbar() # In[42]: # or... plt.imshow(image, cmap=plt.cm.hot) # ## Indexing and Slicing # # - In 2D, the first dimension corresponds to rows, the second to columns. # - for multidimensional a, a[0] is interpreted by taking all elements in the unspecified dimensions. # In[43]: a = np.arange(10) a # indexing begins at 0 , unlike fortran or # In[44]: # For multi-dimensional arrays, indexing are tuples of integers a = np.diag(np.arange(3)) a # In[45]: a[1,1] # In[46]: a[1] # In[47]: a[2, 1] = 10 # replaces row 3, second value # In[48]: a # In[49]: # Slicing a = np.arange(10) a # In[50]: a[2:9:3] # [start:end:step] # In[51]: a[:4] # note: last index isn't included # In[52]: # all three slice components are not required # ### An illustration of Numpy indexing and splicing # # # In[53]: a = np.arange(6) + np.arange(0, 51, 10)[:, np.newaxis] print("Answer to above questions is: ") a # In[54]: print("Orange is: ", a[0, 3:5]) # Try similarly others to get more answers and play with this array a bit more # In[55]: np.diag(np.tile(4,4)) # In[56]: x = np.array([0, 1, 2]) np.tile(x, 4) # In[57]: y = np.array([[1,2], [3,4]]) np.tile(y, 3) # ### Solve this puzzel to arrive to the following answer: # # $$x = # \begin{bmatrix} # 4 & 5 & 4 & 5 & 4 & 5 \\ # 6 & 7 & 6 & 7 & 6 & 7 \\ # 4 & 5 & 4 & 5 & 4 & 5 \\ # 6 & 7 & 6 & 7 & 6 & 7 \\ # \end{bmatrix}\tag{1}$$ # # **Hint**: play with np.tile() # # ##### I will publish a full solution page later... # ## Copies and views # # A slicing operation creates a view on the original array, which is just a way of accessing array data. Thus the original array is not copied in memory. You can use `np.may_share_memory()` to check if two arrays share the same memory block. Note however, that this uses heuristics and may give you false positives. # In[58]: a = np.arange(10) a # you get the range from 0 to 9 - total of 10 # In[59]: b = a[::2] b # you get every 3rd object, as 0,1 -->2, 2,2 -->4 and so on # In[60]: np.may_share_memory(a, b) # In[61]: b[0] = 12 #we replace 0 with 12 b # In[62]: # So what is a then? a # In[63]: # Let's rearrange a = np.arange(10) c = a[::2].copy() # we force copy c[0] = 12 a # #### So, what happened? # # Let's do it step by step # In[64]: a = np.arange(10) a # In[65]: c = a[::2].copy() c # In[66]: c[0] = 12 c # In[67]: a # #### They simply didnt share the same memory block and thus giving you a false positive! # # In[68]: np.may_share_memory(a, c) # ## Fancy Indexing # # NumPy arrays can be indexed with slices, but also with boolean or integer arrays **(masks)**. This method is called fancy indexing. It creates copies **not** views. # In[69]: np.random.seed(3) a = np.random.randint(0, 20, 15) a # In[70]: (a % 3 ==0) # In[71]: mask = (a % 3 == 0) get_from_a = a[mask] get_from_a # you extract a sub-array from mask # In[72]: # Indexing with mask can be fun to assign a new value to a sub-array a[a % 3 == 0] = -1 a # ## Indexing with array of integers # In[73]: a = np.arange(10) idx = np.array([[3,4], [6,7]]) # In[74]: idx.shape # In[75]: a[idx] # ## Here's another fun exercise to play with... # # This image shows various ways to index, play with these and come back with your own results 👇 # # # # and your little exercise could be: # # ### Exercise 2 : Reproduce this fancy index # # SOLUTIONS # # ## Exercise 1, Solutions 1, 2, 3 # # **Hint**: use help(np.diag) for info. # # and also try out some more yourself! # # **NOTE**: As this Numpy lecture expands, the exercise portion will expand too and the solutions will be provided in another ipynb notebook later # ### --------------------------------------------------------------------------------------------------------------### # # Solutions and explanations to exercises # ### --------------------------------------------------------------------------------------------------------------### # ### Exercise 1: Solution 1 # In[76]: help(np.diag) # ### Exercise 1: Solution 1 # In[77]: a = np.ones((4, 4), dtype=int) a[3, 1] = 6 a[2, 3] = 8 #a[[3, 1], [2, 3]] = [6, 8] - - yoc can also do them together print(a) # ### Exercise 1: Solution 2 # In[78]: b = np.zeros((6, 5)) b[1:] = np.diag(np.arange(7,12)) b # ### Exercise 1: Solution 3 # In[79]: y = np.eye(6, 5, k=-2, dtype=float) y # In[ ]: